package com.wtw.test

import org.apache.spark.{SparkConf, SparkContext}

object TopK {
  def main(args: Array[String]): Unit = {
    //    时间戳 省份 城市 用户 广告
    //需求：统计出每一个省份广告被点击次数的TOP3
    val sparkConf = new SparkConf().setMaster("local[*]").setAppName("practice")
    val sc = new SparkContext(sparkConf)

    val file = sc.textFile("src\\main\\resources\\agent.log")

    //((省份，广告)，点击次数)
    val count = file.map(x => {
      val arr = x.split(" ")
      ((arr(1), arr(4)), 1)
    }).reduceByKey(_+_)


    val countByPro = count.map(x => {
      //(省份，(广告，点击次数))
      (x._1._1, (x._1._2, x._2))
    }).groupByKey()

    val countByProSorted = countByPro.mapValues {
      x => {
        x.toList.sortWith((a, b) => {
          a._2 > b._2
        })
      }.take(3)
    }

    val res = countByProSorted.sortByKey()

    res.collect().foreach(println(_))
  }
}
